bitkeeper revision 1.1389.15.4 (4280e2e1TW-3Y8iE13utT8fyuaozWA)

author kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>

Tue, 10 May 2005 16:35:45 +0000 (16:35 +0000)

committer kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>

Tue, 10 May 2005 16:35:45 +0000 (16:35 +0000)
author kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Tue, 10 May 2005 16:35:45 +0000 (16:35 +0000)
committer kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Tue, 10 May 2005 16:35:45 +0000 (16:35 +0000)
diff --git a/xen/arch/ia64/irq.c b/xen/arch/ia64/irq.c

index 2ce076e324b9d3342328ec7f35ddff741a9fef46..1b4e277343d678cdd38b2b53bf4c6f085280a224 100644 (file)
--- a/xen/arch/ia64/irq.c
+++ b/xen/arch/ia64/irq.c
@@ -1468,6 +1468,29 @@ int pirq_guest_unbind(struct domain *d, int irq)
      spin_unlock_irqrestore(&desc->lock, flags);    
      return 0;
  }
+
+int pirq_guest_bindable(int irq, int will_share)
+{
+    irq_desc_t         *desc = &irq_desc[irq];
+    irq_guest_action_t *action;
+    unsigned long       flags;
+    int                 okay;
+
+    spin_lock_irqsave(&desc->lock, flags);
+
+    action = (irq_guest_action_t *)desc->action;
+
+    /*
+     * To be bindable the IRQ must either be not currently bound (1), or
+     * it must be shareable (2) and not at its share limit (3).
+     */
+    okay = ((!(desc->status & IRQ_GUEST) && (action == NULL)) || /* 1 */
+            (action->shareable && will_share &&                  /* 2 */
+             (action->nr_guests != IRQ_MAX_GUESTS)));            /* 3 */
+
+    spin_unlock_irqrestore(&desc->lock, flags);
+    return okay;
+}
  #endif
  
  #ifdef XEN
diff --git a/xen/arch/x86/acpi/boot.c b/xen/arch/x86/acpi/boot.c

index ad3d043138b1e6300b25c697c2cb6e8c2eb2e0aa..7237d5e7e25b5dc589fdb05a312efe1d95a5deb6 100644 (file)
--- a/xen/arch/x86/acpi/boot.c
+++ b/xen/arch/x86/acpi/boot.c
@@ -447,6 +447,44 @@ acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
  
  #endif /* CONFIG_ACPI_BUS */
  
+int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
+{
+#ifdef CONFIG_X86_IO_APIC
+       if (use_pci_vector() && !platform_legacy_irq(gsi))
+               *irq = IO_APIC_VECTOR(gsi);
+       else
+#endif
+               *irq = gsi;
+       return 0;
+}
+
+unsigned int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low)
+{
+       unsigned int irq;
+       unsigned int plat_gsi = gsi;
+
+#ifdef CONFIG_PCI
+       /*
+        * Make sure all (legacy) PCI IRQs are set as level-triggered.
+        */
+       if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
+               extern void eisa_set_level_irq(unsigned int irq);
+
+               if (edge_level == ACPI_LEVEL_SENSITIVE)
+                               eisa_set_level_irq(gsi);
+       }
+#endif
+
+#ifdef CONFIG_X86_IO_APIC
+       if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
+               plat_gsi = mp_register_gsi(gsi, edge_level, active_high_low);
+       }
+#endif
+       acpi_gsi_to_irq(plat_gsi, &irq);
+       return irq;
+}
+EXPORT_SYMBOL(acpi_register_gsi);
+
  /*
   *  ACPI based hotplug support for CPU
   */
@@ -818,6 +856,10 @@ acpi_boot_table_init(void)
                 return error;
         }
  
+#if 0 /*def __i386__*/
+       check_acpi_pci();
+#endif
+
         acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
  
         /*
diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c

index 2b485a1fe11b03c8883e2516aace5c06e693a27f..3e7c89cf0e1745829cdaa30332fad99b8e4efc48 100644 (file)
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -64,8 +64,12 @@ static struct irq_pin_list {
  } irq_2_pin[PIN_MAP_SIZE];
  
  int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1};
+#ifdef CONFIG_PCI_MSI
  #define vector_to_irq(vector)  \
         (platform_legacy_irq(vector) ? vector : vector_irq[vector])
+#else
+#define vector_to_irq(vector)  (vector)
+#endif
  
  /*
   * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
@@ -141,16 +145,16 @@ static void __unmask_IO_APIC_irq (unsigned int irq)
         __modify_IO_APIC_irq(irq, 0, 0x00010000);
  }
  
-/* trigger = 0 */
-static void __edge_IO_APIC_irq (unsigned int irq)
+/* mask = 1, trigger = 0 */
+static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
  {
-       __modify_IO_APIC_irq(irq, 0, 0x00008000);
+       __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
  }
  
-/* trigger = 1 */
-static void __level_IO_APIC_irq (unsigned int irq)
+/* mask = 0, trigger = 1 */
+static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
  {
-       __modify_IO_APIC_irq(irq, 0x00008000, 0);
+       __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
  }
  
  static void mask_IO_APIC_irq (unsigned int irq)
@@ -227,6 +231,423 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
         spin_unlock_irqrestore(&ioapic_lock, flags);
  }
  
+#if defined(CONFIG_IRQBALANCE)
+# include <asm/processor.h>    /* kernel_thread() */
+# include <xen/kernel_stat.h>  /* kstat */
+# include <xen/slab.h>         /* kmalloc() */
+# include <xen/timer.h>        /* time_after() */
+ 
+# ifdef CONFIG_BALANCED_IRQ_DEBUG
+#  define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
+#  define Dprintk(x...) do { TDprintk(x); } while (0)
+# else
+#  define TDprintk(x...) 
+#  define Dprintk(x...) 
+# endif
+
+cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
+
+#define IRQBALANCE_CHECK_ARCH -999
+static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
+static int physical_balance = 0;
+
+struct irq_cpu_info {
+       unsigned long * last_irq;
+       unsigned long * irq_delta;
+       unsigned long irq;
+} irq_cpu_data[NR_CPUS];
+
+#define CPU_IRQ(cpu)           (irq_cpu_data[cpu].irq)
+#define LAST_CPU_IRQ(cpu,irq)   (irq_cpu_data[cpu].last_irq[irq])
+#define IRQ_DELTA(cpu,irq)     (irq_cpu_data[cpu].irq_delta[irq])
+
+#define IDLE_ENOUGH(cpu,now) \
+               (idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
+
+#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
+
+#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
+
+#define MAX_BALANCED_IRQ_INTERVAL      (5*HZ)
+#define MIN_BALANCED_IRQ_INTERVAL      (HZ/2)
+#define BALANCED_IRQ_MORE_DELTA                (HZ/10)
+#define BALANCED_IRQ_LESS_DELTA                (HZ)
+
+long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
+
+static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
+                       unsigned long now, int direction)
+{
+       int search_idle = 1;
+       int cpu = curr_cpu;
+
+       goto inside;
+
+       do {
+               if (unlikely(cpu == curr_cpu))
+                       search_idle = 0;
+inside:
+               if (direction == 1) {
+                       cpu++;
+                       if (cpu >= NR_CPUS)
+                               cpu = 0;
+               } else {
+                       cpu--;
+                       if (cpu == -1)
+                               cpu = NR_CPUS-1;
+               }
+       } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
+                       (search_idle && !IDLE_ENOUGH(cpu,now)));
+
+       return cpu;
+}
+
+static inline void balance_irq(int cpu, int irq)
+{
+       unsigned long now = jiffies;
+       cpumask_t allowed_mask;
+       unsigned int new_cpu;
+               
+       if (irqbalance_disabled)
+               return; 
+
+       cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
+       new_cpu = move(cpu, allowed_mask, now, 1);
+       if (cpu != new_cpu) {
+               irq_desc_t *desc = irq_desc + irq;
+               unsigned long flags;
+
+               spin_lock_irqsave(&desc->lock, flags);
+               pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu);
+               spin_unlock_irqrestore(&desc->lock, flags);
+       }
+}
+
+static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
+{
+       int i, j;
+       Dprintk("Rotating IRQs among CPUs.\n");
+       for (i = 0; i < NR_CPUS; i++) {
+               for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
+                       if (!irq_desc[j].action)
+                               continue;
+                       /* Is it a significant load ?  */
+                       if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
+                                               useful_load_threshold)
+                               continue;
+                       balance_irq(i, j);
+               }
+       }
+       balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
+               balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);       
+       return;
+}
+
+static void do_irq_balance(void)
+{
+       int i, j;
+       unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
+       unsigned long move_this_load = 0;
+       int max_loaded = 0, min_loaded = 0;
+       int load;
+       unsigned long useful_load_threshold = balanced_irq_interval + 10;
+       int selected_irq;
+       int tmp_loaded, first_attempt = 1;
+       unsigned long tmp_cpu_irq;
+       unsigned long imbalance = 0;
+       cpumask_t allowed_mask, target_cpu_mask, tmp;
+
+       for (i = 0; i < NR_CPUS; i++) {
+               int package_index;
+               CPU_IRQ(i) = 0;
+               if (!cpu_online(i))
+                       continue;
+               package_index = CPU_TO_PACKAGEINDEX(i);
+               for (j = 0; j < NR_IRQS; j++) {
+                       unsigned long value_now, delta;
+                       /* Is this an active IRQ? */
+                       if (!irq_desc[j].action)
+                               continue;
+                       if ( package_index == i )
+                               IRQ_DELTA(package_index,j) = 0;
+                       /* Determine the total count per processor per IRQ */
+                       value_now = (unsigned long) kstat_cpu(i).irqs[j];
+
+                       /* Determine the activity per processor per IRQ */
+                       delta = value_now - LAST_CPU_IRQ(i,j);
+
+                       /* Update last_cpu_irq[][] for the next time */
+                       LAST_CPU_IRQ(i,j) = value_now;
+
+                       /* Ignore IRQs whose rate is less than the clock */
+                       if (delta < useful_load_threshold)
+                               continue;
+                       /* update the load for the processor or package total */
+                       IRQ_DELTA(package_index,j) += delta;
+
+                       /* Keep track of the higher numbered sibling as well */
+                       if (i != package_index)
+                               CPU_IRQ(i) += delta;
+                       /*
+                        * We have sibling A and sibling B in the package
+                        *
+                        * cpu_irq[A] = load for cpu A + load for cpu B
+                        * cpu_irq[B] = load for cpu B
+                        */
+                       CPU_IRQ(package_index) += delta;
+               }
+       }
+       /* Find the least loaded processor package */
+       for (i = 0; i < NR_CPUS; i++) {
+               if (!cpu_online(i))
+                       continue;
+               if (i != CPU_TO_PACKAGEINDEX(i))
+                       continue;
+               if (min_cpu_irq > CPU_IRQ(i)) {
+                       min_cpu_irq = CPU_IRQ(i);
+                       min_loaded = i;
+               }
+       }
+       max_cpu_irq = ULONG_MAX;
+
+tryanothercpu:
+       /* Look for heaviest loaded processor.
+        * We may come back to get the next heaviest loaded processor.
+        * Skip processors with trivial loads.
+        */
+       tmp_cpu_irq = 0;
+       tmp_loaded = -1;
+       for (i = 0; i < NR_CPUS; i++) {
+               if (!cpu_online(i))
+                       continue;
+               if (i != CPU_TO_PACKAGEINDEX(i))
+                       continue;
+               if (max_cpu_irq <= CPU_IRQ(i)) 
+                       continue;
+               if (tmp_cpu_irq < CPU_IRQ(i)) {
+                       tmp_cpu_irq = CPU_IRQ(i);
+                       tmp_loaded = i;
+               }
+       }
+
+       if (tmp_loaded == -1) {
+        /* In the case of small number of heavy interrupt sources, 
+         * loading some of the cpus too much. We use Ingo's original 
+         * approach to rotate them around.
+         */
+               if (!first_attempt && imbalance >= useful_load_threshold) {
+                       rotate_irqs_among_cpus(useful_load_threshold);
+                       return;
+               }
+               goto not_worth_the_effort;
+       }
+       
+       first_attempt = 0;              /* heaviest search */
+       max_cpu_irq = tmp_cpu_irq;      /* load */
+       max_loaded = tmp_loaded;        /* processor */
+       imbalance = (max_cpu_irq - min_cpu_irq) / 2;
+       
+       Dprintk("max_loaded cpu = %d\n", max_loaded);
+       Dprintk("min_loaded cpu = %d\n", min_loaded);
+       Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
+       Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
+       Dprintk("load imbalance = %lu\n", imbalance);
+
+       /* if imbalance is less than approx 10% of max load, then
+        * observe diminishing returns action. - quit
+        */
+       if (imbalance < (max_cpu_irq >> 3)) {
+               Dprintk("Imbalance too trivial\n");
+               goto not_worth_the_effort;
+       }
+
+tryanotherirq:
+       /* if we select an IRQ to move that can't go where we want, then
+        * see if there is another one to try.
+        */
+       move_this_load = 0;
+       selected_irq = -1;
+       for (j = 0; j < NR_IRQS; j++) {
+               /* Is this an active IRQ? */
+               if (!irq_desc[j].action)
+                       continue;
+               if (imbalance <= IRQ_DELTA(max_loaded,j))
+                       continue;
+               /* Try to find the IRQ that is closest to the imbalance
+                * without going over.
+                */
+               if (move_this_load < IRQ_DELTA(max_loaded,j)) {
+                       move_this_load = IRQ_DELTA(max_loaded,j);
+                       selected_irq = j;
+               }
+       }
+       if (selected_irq == -1) {
+               goto tryanothercpu;
+       }
+
+       imbalance = move_this_load;
+       
+       /* For physical_balance case, we accumlated both load
+        * values in the one of the siblings cpu_irq[],
+        * to use the same code for physical and logical processors
+        * as much as possible. 
+        *
+        * NOTE: the cpu_irq[] array holds the sum of the load for
+        * sibling A and sibling B in the slot for the lowest numbered
+        * sibling (A), _AND_ the load for sibling B in the slot for
+        * the higher numbered sibling.
+        *
+        * We seek the least loaded sibling by making the comparison
+        * (A+B)/2 vs B
+        */
+       load = CPU_IRQ(min_loaded) >> 1;
+       for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
+               if (load > CPU_IRQ(j)) {
+                       /* This won't change cpu_sibling_map[min_loaded] */
+                       load = CPU_IRQ(j);
+                       min_loaded = j;
+               }
+       }
+
+       cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]);
+       target_cpu_mask = cpumask_of_cpu(min_loaded);
+       cpus_and(tmp, target_cpu_mask, allowed_mask);
+
+       if (!cpus_empty(tmp)) {
+               irq_desc_t *desc = irq_desc + selected_irq;
+               unsigned long flags;
+
+               Dprintk("irq = %d moved to cpu = %d\n",
+                               selected_irq, min_loaded);
+               /* mark for change destination */
+               spin_lock_irqsave(&desc->lock, flags);
+               pending_irq_balance_cpumask[selected_irq] =
+                                       cpumask_of_cpu(min_loaded);
+               spin_unlock_irqrestore(&desc->lock, flags);
+               /* Since we made a change, come back sooner to 
+                * check for more variation.
+                */
+               balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
+                       balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);       
+               return;
+       }
+       goto tryanotherirq;
+
+not_worth_the_effort:
+       /*
+        * if we did not find an IRQ to move, then adjust the time interval
+        * upward
+        */
+       balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
+               balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);       
+       Dprintk("IRQ worth rotating not found\n");
+       return;
+}
+
+static int balanced_irq(void *unused)
+{
+       int i;
+       unsigned long prev_balance_time = jiffies;
+       long time_remaining = balanced_irq_interval;
+
+       daemonize("kirqd");
+       
+       /* push everything to CPU 0 to give us a starting point.  */
+       for (i = 0 ; i < NR_IRQS ; i++) {
+               pending_irq_balance_cpumask[i] = cpumask_of_cpu(0);
+       }
+
+       for ( ; ; ) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               time_remaining = schedule_timeout(time_remaining);
+               try_to_freeze(PF_FREEZE);
+               if (time_after(jiffies,
+                               prev_balance_time+balanced_irq_interval)) {
+                       do_irq_balance();
+                       prev_balance_time = jiffies;
+                       time_remaining = balanced_irq_interval;
+               }
+       }
+       return 0;
+}
+
+static int __init balanced_irq_init(void)
+{
+       int i;
+       struct cpuinfo_x86 *c;
+       cpumask_t tmp;
+
+       cpus_shift_right(tmp, cpu_online_map, 2);
+        c = &boot_cpu_data;
+       /* When not overwritten by the command line ask subarchitecture. */
+       if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
+               irqbalance_disabled = NO_BALANCE_IRQ;
+       if (irqbalance_disabled)
+               return 0;
+       
+        /* disable irqbalance completely if there is only one processor online */
+       if (num_online_cpus() < 2) {
+               irqbalance_disabled = 1;
+               return 0;
+       }
+       /*
+        * Enable physical balance only if more than 1 physical processor
+        * is present
+        */
+       if (smp_num_siblings > 1 && !cpus_empty(tmp))
+               physical_balance = 1;
+
+       for (i = 0; i < NR_CPUS; i++) {
+               if (!cpu_online(i))
+                       continue;
+               irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
+               irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
+               if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
+                       printk(KERN_ERR "balanced_irq_init: out of memory");
+                       goto failed;
+               }
+               memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
+               memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
+       }
+       
+       printk(KERN_INFO "Starting balanced_irq\n");
+       if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 
+               return 0;
+       else 
+               printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
+failed:
+       for (i = 0; i < NR_CPUS; i++) {
+               if(irq_cpu_data[i].irq_delta)
+                       kfree(irq_cpu_data[i].irq_delta);
+               if(irq_cpu_data[i].last_irq)
+                       kfree(irq_cpu_data[i].last_irq);
+       }
+       return 0;
+}
+
+int __init irqbalance_disable(char *str)
+{
+       irqbalance_disabled = 1;
+       return 0;
+}
+
+__setup("noirqbalance", irqbalance_disable);
+
+static inline void move_irq(int irq)
+{
+       /* note - we hold the desc->lock */
+       if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) {
+               set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]);
+               cpus_clear(pending_irq_balance_cpumask[irq]);
+       }
+}
+
+late_initcall(balanced_irq_init);
+
+#else /* !CONFIG_IRQBALANCE */
+static inline void move_irq(int irq) { }
+#endif /* CONFIG_IRQBALANCE */
+
  #ifndef CONFIG_SMP
  void fastcall send_IPI_self(int vector)
  {
@@ -1188,6 +1609,7 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq)
   */
  static void ack_edge_ioapic_irq(unsigned int irq)
  {
+       move_irq(irq);
         if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
                                         == (IRQ_PENDING | IRQ_DISABLED))
                 mask_IO_APIC_irq(irq);
@@ -1215,13 +1637,12 @@ static unsigned int startup_level_ioapic_irq (unsigned int irq)
         return 0; /* don't check for pending */
  }
  
-static void mask_and_ack_level_ioapic_irq (unsigned int irq)
+static void end_level_ioapic_irq (unsigned int irq)
  {
         unsigned long v;
         int i;
  
-       mask_IO_APIC_irq(irq);
-
+       move_irq(irq);
  /*
   * It appears there is an erratum which affects at least version 0x11
   * of I/O APIC (that's the 82093AA and cores integrated into various
@@ -1250,17 +1671,13 @@ static void mask_and_ack_level_ioapic_irq (unsigned int irq)
         if (!(v & (1 << (i & 0x1f)))) {
                 atomic_inc(&irq_mis_count);
                 spin_lock(&ioapic_lock);
-               __edge_IO_APIC_irq(irq);
-               __level_IO_APIC_irq(irq);
+               __mask_and_edge_IO_APIC_irq(irq);
+               __unmask_and_level_IO_APIC_irq(irq);
                 spin_unlock(&ioapic_lock);
         }
  }
  
-static void end_level_ioapic_irq (unsigned int irq)
-{
-       unmask_IO_APIC_irq(irq);
-}
-
+#ifdef CONFIG_PCI_MSI
  static unsigned int startup_edge_ioapic_vector(unsigned int vector)
  {
         int irq = vector_to_irq(vector);
@@ -1282,13 +1699,6 @@ static unsigned int startup_level_ioapic_vector (unsigned int vector)
         return startup_level_ioapic_irq (irq);
  }
  
-static void mask_and_ack_level_ioapic_vector (unsigned int vector)
-{
-       int irq = vector_to_irq(vector);
-
-       mask_and_ack_level_ioapic_irq(irq);
-}
-
  static void end_level_ioapic_vector (unsigned int vector)
  {
         int irq = vector_to_irq(vector);
@@ -1317,11 +1727,7 @@ static void set_ioapic_affinity_vector (unsigned int vector,
  
         set_ioapic_affinity_irq(irq, cpu_mask);
  }
-
-static void noop_ioapic_vector(unsigned int vector)
-{
-       /* nothing */
-}
+#endif
  
  /*
   * Level and edge triggered IO-APIC interrupts need different handling,
@@ -1333,24 +1739,24 @@ static void noop_ioapic_vector(unsigned int vector)
   */
  static struct hw_interrupt_type ioapic_edge_type = {
         .typename       = "IO-APIC-edge",
-       .startup        = startup_edge_ioapic_vector,
-       .shutdown       = noop_ioapic_vector,
-       .enable         = unmask_IO_APIC_vector,
-       .disable        = noop_ioapic_vector,
-       .ack            = ack_edge_ioapic_vector,
-       .end            = noop_ioapic_vector,
-       .set_affinity   = set_ioapic_affinity_vector,
+       .startup        = startup_edge_ioapic,
+       .shutdown       = shutdown_edge_ioapic,
+       .enable         = enable_edge_ioapic,
+       .disable        = disable_edge_ioapic,
+       .ack            = ack_edge_ioapic,
+       .end            = end_edge_ioapic,
+       .set_affinity   = set_ioapic_affinity,
  };
  
  static struct hw_interrupt_type ioapic_level_type = {
         .typename       = "IO-APIC-level",
-       .startup        = startup_level_ioapic_vector,
-       .shutdown       = mask_IO_APIC_vector,
-       .enable         = unmask_IO_APIC_vector,
-       .disable        = mask_IO_APIC_vector,
-       .ack            = mask_and_ack_level_ioapic_vector,
-       .end            = end_level_ioapic_vector,
-       .set_affinity   = set_ioapic_affinity_vector,
+       .startup        = startup_level_ioapic,
+       .shutdown       = shutdown_level_ioapic,
+       .enable         = enable_level_ioapic,
+       .disable        = disable_level_ioapic,
+       .ack            = mask_and_ack_level_ioapic,
+       .end            = end_level_ioapic,
+       .set_affinity   = set_ioapic_affinity,
  };
  
  static inline void init_IO_APIC_traps(void)
@@ -1850,7 +2256,8 @@ int ioapic_guest_write(int apicid, int address, u32 val)
              return 0;
  
          /* Set the correct irq-handling type. */
-        ioapic_register_intr(irq, rte.vector, rte.trigger);
+        irq_desc[irq].handler = rte.trigger ? 
+            &ioapic_level_type: &ioapic_edge_type;
  
          /* Record the pin<->irq mapping. */
          for ( entry = &irq_2_pin[irq]; ; entry = &irq_2_pin[entry->next] )
diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c

index 784528ae74ed524106aecd0ebaf07758ccd5fa3a..aaaff647cebaa201e2a006b81b2784f207549c66 100644 (file)
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -186,30 +186,26 @@ typedef struct {
      struct exec_domain *guest[IRQ_MAX_GUESTS];
  } irq_guest_action_t;
  
-extern int vector_irq[];
-
  static void __do_IRQ_guest(int irq)
  {
      irq_desc_t         *desc = &irq_desc[irq];
      irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
      struct exec_domain *ed;
-    int                 i, pirq;
-
-    pirq = platform_legacy_irq(irq) ? irq : vector_irq[irq];
+    int                 i;
  
      for ( i = 0; i < action->nr_guests; i++ )
      {
          ed = action->guest[i];
-        if ( !test_and_set_bit(pirq, &ed->domain->pirq_mask) )
+        if ( !test_and_set_bit(irq, &ed->domain->pirq_mask) )
              action->in_flight++;
-        send_guest_pirq(ed, pirq);
+        send_guest_pirq(ed, irq);
      }
  }
  
  int pirq_guest_unmask(struct domain *d)
  {
      irq_desc_t    *desc;
-    unsigned int   i, j, pirq, vector;
+    unsigned int   i, j, pirq;
      u32            m;
      shared_info_t *s = d->shared_info;
  
@@ -221,13 +217,12 @@ int pirq_guest_unmask(struct domain *d)
              j = find_first_set_bit(m);
              m &= ~(1 << j);
              pirq = (i << 5) + j;
-            vector = platform_legacy_irq(pirq) ? pirq : IO_APIC_VECTOR(pirq);
-            desc = &irq_desc[vector];
+            desc = &irq_desc[pirq];
              spin_lock_irq(&desc->lock);
              if ( !test_bit(d->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) &&
                   test_and_clear_bit(pirq, &d->pirq_mask) &&
                   (--((irq_guest_action_t *)desc->action)->in_flight == 0) )
-                desc->handler->end(vector);
+                desc->handler->end(pirq);
              spin_unlock_irq(&desc->lock);
          }
      }
@@ -238,17 +233,14 @@ int pirq_guest_unmask(struct domain *d)
  int pirq_guest_bind(struct exec_domain *ed, int irq, int will_share)
  {
      struct domain      *d = ed->domain;
-    irq_desc_t         *desc;
+    irq_desc_t         *desc = &irq_desc[irq];
      irq_guest_action_t *action;
      unsigned long       flags;
-    int                 rc = 0, vector;
+    int                 rc = 0;
  
      if ( !IS_CAPABLE_PHYSDEV(d) )
          return -EPERM;
  
-    vector = platform_legacy_irq(irq) ? irq : IO_APIC_VECTOR(irq);
-    desc = &irq_desc[vector];
-
      spin_lock_irqsave(&desc->lock, flags);
  
      action = (irq_guest_action_t *)desc->action;
@@ -278,12 +270,12 @@ int pirq_guest_bind(struct exec_domain *ed, int irq, int will_share)
          desc->depth = 0;
          desc->status |= IRQ_GUEST;
          desc->status &= ~IRQ_DISABLED;
-        desc->handler->startup(vector);
+        desc->handler->startup(irq);
  
          /* Attempt to bind the interrupt target to the correct CPU. */
          if ( desc->handler->set_affinity != NULL )
              desc->handler->set_affinity(
-                vector, apicid_to_phys_cpu_present(ed->processor));
+                irq, apicid_to_phys_cpu_present(ed->processor));
      }
      else if ( !will_share || !action->shareable )
      {
@@ -309,13 +301,10 @@ int pirq_guest_bind(struct exec_domain *ed, int irq, int will_share)
  
  int pirq_guest_unbind(struct domain *d, int irq)
  {
-    irq_desc_t         *desc;
+    irq_desc_t         *desc = &irq_desc[irq];
      irq_guest_action_t *action;
      unsigned long       flags;
-    int                 i, vector;
-
-    vector = platform_legacy_irq(irq) ? irq : IO_APIC_VECTOR(irq);
-    desc = &irq_desc[vector];
+    int                 i;
  
      spin_lock_irqsave(&desc->lock, flags);
  
@@ -323,7 +312,7 @@ int pirq_guest_unbind(struct domain *d, int irq)
  
      if ( test_and_clear_bit(irq, &d->pirq_mask) &&
           (--action->in_flight == 0) )
-        desc->handler->end(vector);
+        desc->handler->end(irq);
  
      if ( action->nr_guests == 1 )
      {
@@ -332,7 +321,7 @@ int pirq_guest_unbind(struct domain *d, int irq)
          desc->depth   = 1;
          desc->status |= IRQ_DISABLED;
          desc->status &= ~IRQ_GUEST;
-        desc->handler->shutdown(vector);
+        desc->handler->shutdown(irq);
      }
      else
      {
@@ -346,3 +335,26 @@ int pirq_guest_unbind(struct domain *d, int irq)
      spin_unlock_irqrestore(&desc->lock, flags);    
      return 0;
  }
+
+int pirq_guest_bindable(int irq, int will_share)
+{
+    irq_desc_t         *desc = &irq_desc[irq];
+    irq_guest_action_t *action;
+    unsigned long       flags;
+    int                 okay;
+
+    spin_lock_irqsave(&desc->lock, flags);
+
+    action = (irq_guest_action_t *)desc->action;
+
+    /*
+     * To be bindable the IRQ must either be not currently bound (1), or
+     * it must be shareable (2) and not at its share limit (3).
+     */
+    okay = ((!(desc->status & IRQ_GUEST) && (action == NULL)) || /* 1 */
+            (action->shareable && will_share &&                  /* 2 */
+             (action->nr_guests != IRQ_MAX_GUESTS)));            /* 3 */
+
+    spin_unlock_irqrestore(&desc->lock, flags);
+    return okay;
+}
diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c

index 048fca68ee2c2f8a1e3f8e02f0ddf851bef84880..5de96ec96dcebf1cac6c38ca8607f258b0ba2c01 100644 (file)
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -41,7 +41,7 @@ long do_physdev_op(physdev_op_t *uop)
  {
      physdev_op_t op;
      long         ret;
-    int          irq, vector;
+    int          irq;
  
      if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
          return -EFAULT;
@@ -87,13 +87,8 @@ long do_physdev_op(physdev_op_t *uop)
          if ( (irq = op.u.irq_op.irq) >= NR_IRQS )
              return -EINVAL;
          
-        op.u.irq_op.vector = vector = assign_irq_vector(irq);
-
-        if ( use_pci_vector() && !platform_legacy_irq(irq) )
-            set_intr_gate(vector, interrupt[vector]);
-        else
-            set_intr_gate(vector, interrupt[irq]);
-
+        op.u.irq_op.vector = assign_irq_vector(irq);
+        set_intr_gate(op.u.irq_op.vector, interrupt[irq]);
          ret = 0;
          break;
  
diff --git a/xen/include/asm-x86/io_apic.h b/xen/include/asm-x86/io_apic.h

index 86aa452961d1381748c169bb9ff44157f908819e..5c71c08f58fcf8e37380261f16a8f637b60a3952 100644 (file)
--- a/xen/include/asm-x86/io_apic.h
+++ b/xen/include/asm-x86/io_apic.h
@@ -14,7 +14,45 @@
  
  #ifdef CONFIG_X86_IO_APIC
  
+#ifdef CONFIG_PCI_MSI
  static inline int use_pci_vector(void) {return 1;}
+static inline void disable_edge_ioapic_vector(unsigned int vector) { }
+static inline void mask_and_ack_level_ioapic_vector(unsigned int vector) { }
+static inline void end_edge_ioapic_vector (unsigned int vector) { }
+#define startup_level_ioapic   startup_level_ioapic_vector
+#define shutdown_level_ioapic  mask_IO_APIC_vector
+#define enable_level_ioapic    unmask_IO_APIC_vector
+#define disable_level_ioapic   mask_IO_APIC_vector
+#define mask_and_ack_level_ioapic mask_and_ack_level_ioapic_vector
+#define end_level_ioapic       end_level_ioapic_vector
+#define set_ioapic_affinity    set_ioapic_affinity_vector
+
+#define startup_edge_ioapic    startup_edge_ioapic_vector
+#define shutdown_edge_ioapic   disable_edge_ioapic_vector
+#define enable_edge_ioapic     unmask_IO_APIC_vector
+#define disable_edge_ioapic    disable_edge_ioapic_vector
+#define ack_edge_ioapic        ack_edge_ioapic_vector
+#define end_edge_ioapic        end_edge_ioapic_vector
+#else
+static inline int use_pci_vector(void) {return 0;}
+static inline void disable_edge_ioapic_irq(unsigned int irq) { }
+static inline void mask_and_ack_level_ioapic_irq(unsigned int irq) { }
+static inline void end_edge_ioapic_irq (unsigned int irq) { }
+#define startup_level_ioapic   startup_level_ioapic_irq
+#define shutdown_level_ioapic  mask_IO_APIC_irq
+#define enable_level_ioapic    unmask_IO_APIC_irq
+#define disable_level_ioapic   mask_IO_APIC_irq
+#define mask_and_ack_level_ioapic mask_and_ack_level_ioapic_irq
+#define end_level_ioapic       end_level_ioapic_irq
+#define set_ioapic_affinity    set_ioapic_affinity_irq
+
+#define startup_edge_ioapic    startup_edge_ioapic_irq
+#define shutdown_edge_ioapic   disable_edge_ioapic_irq
+#define enable_edge_ioapic     unmask_IO_APIC_irq
+#define disable_edge_ioapic    disable_edge_ioapic_irq
+#define ack_edge_ioapic        ack_edge_ioapic_irq
+#define end_edge_ioapic        end_edge_ioapic_irq
+#endif
  
  #define IO_APIC_BASE(idx) \
                 ((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \
diff --git a/xen/include/asm-x86/mach-default/irq_vectors_limits.h b/xen/include/asm-x86/mach-default/irq_vectors_limits.h

index a0664d7f67556790e70824d27b8cf59d994cec2c..b330026e6f7ff2d95cdb8fb3ad8b0642462379aa 100644 (file)
--- a/xen/include/asm-x86/mach-default/irq_vectors_limits.h
+++ b/xen/include/asm-x86/mach-default/irq_vectors_limits.h
@@ -1,8 +1,21 @@
  #ifndef _ASM_IRQ_VECTORS_LIMITS_H
  #define _ASM_IRQ_VECTORS_LIMITS_H
  
-/* MSI limits */
+#ifdef CONFIG_PCI_MSI
  #define NR_IRQS FIRST_SYSTEM_VECTOR
  #define NR_IRQ_VECTORS NR_IRQS
+#else
+#ifdef CONFIG_X86_IO_APIC
+#define NR_IRQS 224
+# if (224 >= 32 * NR_CPUS)
+# define NR_IRQ_VECTORS NR_IRQS
+# else
+# define NR_IRQ_VECTORS (32 * NR_CPUS)
+# endif
+#else
+#define NR_IRQS 16
+#define NR_IRQ_VECTORS NR_IRQS
+#endif
+#endif
  
  #endif /* _ASM_IRQ_VECTORS_LIMITS_H */
diff --git a/xen/include/xen/irq.h b/xen/include/xen/irq.h

index 9de76b8ca142d2cdd61fe0cf0ff7fc830b2cf361..f419f0c182f59eda6483a320ca259aae6acdde1a 100644 (file)
--- a/xen/include/xen/irq.h
+++ b/xen/include/xen/irq.h
@@ -71,5 +71,6 @@ struct exec_domain;
  extern int pirq_guest_unmask(struct domain *p);
  extern int pirq_guest_bind(struct exec_domain *p, int irq, int will_share);
  extern int pirq_guest_unbind(struct domain *p, int irq);
+extern int pirq_guest_bindable(int irq, int will_share);
  
  #endif /* __XEN_IRQ_H__ */
author	kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
	Tue, 10 May 2005 16:35:45 +0000 (16:35 +0000)
committer	kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
	Tue, 10 May 2005 16:35:45 +0000 (16:35 +0000)
xen/arch/ia64/irq.c		patch \| blob \| history
xen/arch/x86/acpi/boot.c		patch \| blob \| history
xen/arch/x86/io_apic.c		patch \| blob \| history
xen/arch/x86/irq.c		patch \| blob \| history
xen/arch/x86/physdev.c		patch \| blob \| history
xen/include/asm-x86/io_apic.h		patch \| blob \| history
xen/include/asm-x86/mach-default/irq_vectors_limits.h		patch \| blob \| history
xen/include/xen/irq.h		patch \| blob \| history